### Cumulative Prospect Theory (5 parameter model) function library
## Appendix Theory: A.4.2


MaxLikeSR_CPT_5param_LOO_crossVal = function(data, sub_sample, init){
  # Leave-one-out Cross Validation (LOO CV)
  start <- Sys.time()  
  data = sample_SR(data,sub_sample)
  index = data$studentID                                                            
  data = split(data,f = index)  
  
  dfs_LOOCV = lapply(data, LOOCV_train_test_split)
  results = lapply(dfs_LOOCV, LOO_crossVal_train_predict_CPT_5param, init)
  
  loss_coef = unlist(lapply(results, {function(x) mean(x[["loss.coef"]])}), use.names = FALSE)
  loss_coef_std = unlist(lapply(results, {function(x) sqrt(var(x[["loss.coef"]]))}), use.names = FALSE)
  
  alpha_coef = unlist(lapply(results, {function(x) mean(x[["alpha.coef"]])}), use.names = FALSE)
  alpha_coef_std = unlist(lapply(results, {function(x) sqrt(var(x[["alpha.coef"]]))}), use.names = FALSE)
  
  beta_coef = unlist(lapply(results, {function(x) mean(x[["beta.coef"]])}), use.names = FALSE)
  beta_coef_std = unlist(lapply(results, {function(x) sqrt(var(x[["beta.coef"]]))}), use.names = FALSE)
  
  gamma_coef = unlist(lapply(results, {function(x) mean(x[["gamma.coef"]])}), use.names = FALSE)
  gamma_coef_std = unlist(lapply(results, {function(x) sqrt(var(x[["gamma.coef"]]))}), use.names = FALSE)
  
  delta_coef = unlist(lapply(results, {function(x) mean(x[["delta.coef"]])}), use.names = FALSE)
  delta_coef_std = unlist(lapply(results, {function(x) sqrt(var(x[["delta.coef"]]))}), use.names = FALSE)
  
  score = unlist(lapply(results, {function(x) sum(x$accuracy)}), use.names = FALSE)
  results_scores = as.data.frame(cbind(studentID = unique(index), score = score, loss_mean = loss_coef, alpha_mean = alpha_coef, beta_mean = beta_coef, 
                                       gamma_mean = gamma_coef, delta_mean = delta_coef, 
                                       loss_std = loss_coef_std,  alpha_std = alpha_coef_std, beta_std = beta_coef_std,
                                       gamma_std = gamma_coef_std, delta_std = delta_coef_std))
  
  print("Cumulative Prospect Theory (5 param model) - Leave-One-Out Test Accuracy Summary:")
  print(paste("Average:", mean(results_scores$score)))
  print(paste("Standard error:", sqrt(var(results_scores$score)/length(results_scores$score))))
  
  end = Sys.time()                                                                  
  timer = difftime(end,start,units = "secs")
  print(paste("Time taken for MLE algorithim and Cross-Validation:",seconds_to_period(round(timer[[1]],2))))
  return(list(estimations = results, summary = results_scores))
}



LOOCV_train_test_split = function(data){
  result = list()
  for(i in 1:length(data$qnum)){
    test_df = data[i, ]
    train_df = data[-i, ]
    result[[i]] <- list(train = train_df, test = test_df)
  }
  result
}



LOO_crossVal_train_predict_CPT_5param = function(LOO_train_test_folds, init){
  results = bind_rows(lapply(LOO_train_test_folds, LOO_train_test_CPT_5param, init))
  results
}



LOO_train_test_CPT_5param = function(train_test_dfs, init){
  train_df = train_test_dfs[["train"]]
  test_df = train_test_dfs[["test"]]
  
  results = wrapMLE_CPT_5param(train_df, init, log_lik_CPT_5param)
  results_summary = stat_sum_CPT_5param(results)
  results_summary$studentID = test_df$studentID
  
  param = list(loss = results_summary$loss.coef, alpha = results_summary$alpha.coef, beta = results_summary$beta.coef,
               gamma = results_summary$gamma.coef, delta = results_summary$delta.coef)
  test_predict_results = LOO_test_predict(test_df, param, calc_util_CPT_5param)
  results_summary = cbind(results_summary, test_qnum = test_df$qnum, test_choice = test_df$choice, test_predict_results)
  results_summary
}




LOO_test_predict = function(test_df, param, util_fun){
  lottery_data = list(l1 = list(prob = test_df[["prob.1"]], lower = test_df[["lower.1"]], upper = test_df[["upper.1"]]),
                      l2 = list(prob = test_df[["prob.2"]], lower = test_df[["lower.2"]], upper = test_df[["upper.2"]]),
                      l3 = list(prob = test_df[["prob.3"]], lower = test_df[["lower.3"]], upper = test_df[["upper.3"]]),
                      l4 = list(prob = test_df[["prob.4"]], lower = test_df[["lower.4"]], upper = test_df[["upper.4"]]),
                      l5 = list(prob = test_df[["prob.5"]], lower = test_df[["lower.5"]], upper = test_df[["upper.5"]]))
  
  lottery_utility = unlist(lapply(lottery_data, util_fun, param))
  predicted_choice = unname(which.max(lottery_utility))
  accuracy = if_else(predicted_choice == test_df$choice, 1, 0)
  list(pred_choice = predicted_choice, accuracy = accuracy)
}


calc_util_CPT_5param = function(df, param){
  w_p(param[["delta"]],df[["prob"]])*((df[["upper"]])^param[["alpha"]]) + 
    w_p(param[["gamma"]],1-df[["prob"]])*(-(-df[["lower"]])^param[["beta"]])*param[["loss"]]
}

w_p <- function(param, prob){
  (prob^param)/(((prob^param + (1-prob)^param))^(1/param))
}


wrapMLE_CPT_5param = function(data, init, logLikFun){
  CPT_maxLik_res = mle2(minuslogl = logLikFun, data = list(data=data), start = init,
                        method="L-BFGS-B", optimizer = "optim",
                        lower = c(loss=1, alpha=0.01, beta=0.01, gamma=0.29, delta=0.29),
                        upper = c(loss=3, alpha=1, beta=1, gamma=1, delta=1))
  CPT_maxLik_res
}



stat_sum_CPT_5param = function(stat){
  results = data.frame("studentID"=NA, "loss.coef"=NA, "alpha.coef" = NA, "beta.coef" = NA, "gamma.coef" = NA, "delta.coef" = NA,
                       "std.loss" = NA, "std.alpha" = NA, "std.beta" = NA, "std.gamma" = NA, "std.delta" = NA,
                       "p.val.loss"=NA, "p.val.alpha"=NA, "p.val.beta"=NA, "p.val.gamma"=NA, "p.val.delta"=NA,
                       "logLikelihood"=NA,"AIC"=NA)
  
  summary = summary(stat)
  results["studentID"]       = NA
  results["loss.coef"]       = summary@coef[1,1]
  results["alpha.coef"]      = summary@coef[2,1]
  results["beta.coef"]       = summary@coef[3,1]
  results["gamma.coef"]      = summary@coef[4,1]
  results["delta.coef"]      = summary@coef[5,1]
  results["std.loss"]        = summary@coef[1,2]
  results["std.alpha"]       = summary@coef[2,2]
  results["std.beta"]        = summary@coef[3,2]
  results["std.gamma"]       = summary@coef[4,2]
  results["std.delta"]       = summary@coef[5,2]
  results["p.val.loss"]      = summary@coef[1,4]
  results["p.val.alpha"]     = summary@coef[2,4]
  results["p.val.beta"]      = summary@coef[3,4]
  results["p.val.gamma"]     = summary@coef[4,4]
  results["p.val.delta"]     = summary@coef[5,4]
  results["logLikelihood"] = logLik(stat)
  results["AIC"]  = AIC(stat)
  results
}



log_lik_CPT_5param <- function(loss, alpha, beta, gamma, delta, data){
  # lower bound needs the two minus signs cause R prioritizes raising to the power
  # and hence tries to raise a negative integer to the power of a fraction (results in error)
  # calculating denominator of likelihood of a choice
  denomCalc = function(data){
    exp(w_p(delta,data[["prob.1"]])*(data[["upper.1"]]^alpha) + w_p(gamma,1-data[["prob.1"]])*(-(-data[["lower.1"]])^beta)*loss) +
      exp(w_p(delta,data[["prob.2"]])*(data[["upper.2"]]^alpha) + w_p(gamma,1-data[["prob.2"]])*(-(-data[["lower.2"]])^beta)*loss) +
      exp(w_p(delta,data[["prob.3"]])*(data[["upper.3"]]^alpha) + w_p(gamma,1-data[["prob.3"]])*(-(-data[["lower.3"]])^beta)*loss) +
      exp(w_p(delta,data[["prob.4"]])*(data[["upper.4"]]^alpha) + w_p(gamma,1-data[["prob.4"]])*(-(-data[["lower.4"]])^beta)*loss) +
      exp(w_p(delta,data[["prob.5"]])*(data[["upper.5"]]^alpha) + w_p(gamma,1-data[["prob.5"]])*(-(-data[["lower.5"]])^beta)*loss)
  }
  
  denomRes = apply(data, 1, denomCalc) #vector of denominator components for each student
  
  # calculate numerator of likelihood of a choice
  numerCalc = function(data){
    j = data[["choice"]]
    numercalc = exp(w_p(delta,data[[13+j]])*(data[[8+j]]^alpha) + w_p(gamma,1-data[[13+j]])*(-(-data[[3+j]])^beta)*loss)
    numercalc
  }
  
  numerRes = apply(data, 1, numerCalc)
  
  #calculating log likelihood
  log_lik_i <-  log(numerRes/denomRes)
  
  -sum(unlist(log_lik_i))
}




sample_SR = function(data,sub_sample){
  if(sub_sample == "full"){data = data}
  if(sub_sample == "part1"){data = data %>% filter(qnum<19)}
  if(sub_sample == "part2"){data = data %>%  filter(qnum>18)}
  return(data)
}






















